Author : Fletcher McConnell
Repository: https://github.com/fletcher-m/land-cover-classification
In this repository, I create a land classification map of Southern Santa Barbara county, from data collected from landsat5 satellite and filtered through a decision tree.
Raster manipulation with terra
Geospatial data wrangling with sf
Making a decision tree with rpart
Visualizing land cover with Tmap
In this R markdown document, I use data from the landsat5 satellite https://www.usgs.gov/landsat-missions/landsat-5. This data contains one scene from September 25, 2017. It contains bands 1, 2, 3, 4, 5, 7 (green, blue, red, NIR, SWIR1, SWIR2). The other data used are shapefiles with one containing a polygon representing Southern Santa Barbara County (“SB_county_south_shp”) and the other represents polygons of training sites (“trainingdata.shp”).
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(terra)
## terra 1.7.55
library(here)
## here() starts at /Users/fletchermcconnell/Documents/EDS 223/land_cover_classification
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:terra':
##
## intersect, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rpart)
library(rpart.plot)
library(tmap)
##
## Attaching package: 'tmap'
## The following object is masked from 'package:datasets':
##
## rivers
rm(list = ls())
# list files for each band, including the full file path
filelist <- list.files("/Users/fletchermcconnell/Documents/EDS 223/land_cover_classification/landsat-data", full.names = TRUE)
# read in and store as a raster stack
landsat_20070925 <- rast(filelist)
# update layer names to match band
names(landsat_20070925) <- c("blue", "green", "red", "NIR", "SWIR1", "SWIR2")
# plot true color image
plotRGB(landsat_20070925, r = 3, g = 2, b = 1, stretch = "lin")
# read in shapefile for southern portion of SB county
SB_county_south <- st_read("/Users/fletchermcconnell/Documents/EDS 223/land_cover_classification/data (4)/SB_county_south.shp")
# project to match the Landsat data
SB_county_south <- st_transform(SB_county_south, crs = crs(landsat_20070925))
# crop Landsat scene to the extent of the SB county shapefile
landsat_cropped <- crop(landsat_20070925, SB_county_south)
# mask the raster to southern portion of SB county
landsat_masked <- mask(landsat_cropped, SB_county_south)
# remove unnecessary object from environment
rm(landsat_20070925, SB_county_south, landsat_cropped)
# reclassify erroneous values as NA
rcl <- matrix(c(-Inf, 7273, NA,
43636, Inf, NA), ncol = 3, byrow = TRUE)
landsat <- classify(landsat_masked, rcl = rcl)
# adjust values based on scaling factor
landsat <- (landsat * 0.0000275 - 0.2) * 100
# plot true color image to check results
plotRGB(landsat, r = 3, g = 2, b = 1, stretch = "lin")
# check values are 0 - 100
summary(landsat)
## blue green red NIR
## Min. : 1.11 Min. : 0.74 Min. : 0.00 Min. : 0.23
## 1st Qu.: 2.49 1st Qu.: 2.17 1st Qu.: 1.08 1st Qu.: 0.75
## Median : 3.06 Median : 4.59 Median : 4.45 Median :14.39
## Mean : 3.83 Mean : 5.02 Mean : 4.92 Mean :11.52
## 3rd Qu.: 4.63 3rd Qu.: 6.76 3rd Qu.: 7.40 3rd Qu.:19.34
## Max. :39.42 Max. :53.32 Max. :56.68 Max. :57.08
## NA's :39856 NA's :39855 NA's :39855 NA's :39856
## SWIR1 SWIR2
## Min. : 0.10 Min. : 0.20
## 1st Qu.: 0.41 1st Qu.: 0.60
## Median :13.43 Median : 8.15
## Mean :11.88 Mean : 8.52
## 3rd Qu.:18.70 3rd Qu.:13.07
## Max. :49.13 Max. :48.07
## NA's :42892 NA's :46809
# read in and transform training data
training_data <- st_read("/Users/fletchermcconnell/Documents/EDS 223/land_cover_classification/data (4)/trainingdata.shp") %>%
st_transform(., crs = crs(landsat))
# extract reflectance values at training sites
training_data_values <- extract(landsat, training_data, df = TRUE)
# convert training data to data frame
training_data_attributes <- training_data %>%
st_drop_geometry()
# join training data attributes and extracted reflectance values
SB_training_data <- left_join(training_data_values, training_data_attributes,
by = c("ID" = "id")) %>%
mutate(type = as.factor(type)) # convert landcover type to factor
# establish model formula
SB_formula <- type ~ red + green + blue + NIR + SWIR1 + SWIR2
# train decision tree
SB_decision_tree <- rpart(formula = SB_formula,
data = SB_training_data,
method = "class",
na.action = na.omit)
# plot decision tree
prp(SB_decision_tree)
# classify image based on decision tree
SB_classification <- predict(landsat, SB_decision_tree, type = "class", na.rm = TRUE)
# inspect level to understand the order of classes in prediction
levels(SB_training_data$type)
## [1] "green_vegetation" "soil_dead_grass" "urban" "water"
# plot results
tm_shape(SB_classification) +
tm_raster(col.scale = tm_scale_categorical(values = c("#8DB580", "#F2DDA4", "#7E8987", "#6A8EAE")),
col.legend = tm_legend(labels = c("green vegetation", "soil/dead grass", "urban", "water"),
title = "Landcover type")) +
tm_layout(legend.position = c("left", "bottom"))
## SpatRaster object downsampled to 621 by 1612 cells.